@@ -47,6 +47,8 @@ module Agents |
||
47 | 47 |
|
48 | 48 |
Set `force_encoding` to an encoding name if the website does not return a Content-Type header with a proper charset. |
49 | 49 |
|
50 |
+ Set `user_agent` to a custom User-Agent name if the website does not like the default value ("Faraday v#{Faraday::VERSION}"). |
|
51 |
+ |
|
50 | 52 |
The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload. |
51 | 53 |
MD |
52 | 54 |
|
@@ -105,6 +107,10 @@ module Agents |
||
105 | 107 |
end |
106 | 108 |
end |
107 | 109 |
|
110 |
+ if options['user_agent'].present? |
|
111 |
+ errors.add(:base, "user_agent must be a string") unless options['user_agent'].is_a?(String) |
|
112 |
+ end |
|
113 |
+ |
|
108 | 114 |
begin |
109 | 115 |
basic_auth_credentials() |
110 | 116 |
rescue => e |
@@ -281,6 +287,10 @@ module Agents |
||
281 | 287 |
|
282 | 288 |
def faraday |
283 | 289 |
@faraday ||= Faraday.new { |builder| |
290 |
+ if (user_agent = options['user_agent']).present? |
|
291 |
+ builder.headers[:user_agent] = user_agent |
|
292 |
+ end |
|
293 |
+ |
|
284 | 294 |
builder.use FaradayMiddleware::FollowRedirects |
285 | 295 |
builder.request :url_encoded |
286 | 296 |
if userinfo = basic_auth_credentials() |
@@ -376,4 +376,35 @@ describe Agents::WebsiteAgent do |
||
376 | 376 |
end |
377 | 377 |
end |
378 | 378 |
end |
379 |
+ |
|
380 |
+ describe "checking with User-Agent" do |
|
381 |
+ before do |
|
382 |
+ stub_request(:any, /example/). |
|
383 |
+ with(headers: { 'User-Agent' => 'Sushi' }). |
|
384 |
+ to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200) |
|
385 |
+ @site = { |
|
386 |
+ 'name' => "XKCD", |
|
387 |
+ 'expected_update_period_in_days' => 2, |
|
388 |
+ 'type' => "html", |
|
389 |
+ 'url' => "http://www.example.com", |
|
390 |
+ 'mode' => 'on_change', |
|
391 |
+ 'extract' => { |
|
392 |
+ 'url' => { 'css' => "#comic img", 'attr' => "src" }, |
|
393 |
+ 'title' => { 'css' => "#comic img", 'attr' => "alt" }, |
|
394 |
+ 'hovertext' => { 'css' => "#comic img", 'attr' => "title" } |
|
395 |
+ }, |
|
396 |
+ 'user_agent' => "Sushi" |
|
397 |
+ } |
|
398 |
+ @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @site) |
|
399 |
+ @checker.user = users(:bob) |
|
400 |
+ @checker.save! |
|
401 |
+ end |
|
402 |
+ |
|
403 |
+ describe "#check" do |
|
404 |
+ it "should check for changes" do |
|
405 |
+ lambda { @checker.check }.should change { Event.count }.by(1) |
|
406 |
+ lambda { @checker.check }.should_not change { Event.count } |
|
407 |
+ end |
|
408 |
+ end |
|
409 |
+ end |
|
379 | 410 |
end |